cd "<filepath>"

* Automatically generate new log file
local c_date = c(current_date)
local c_time = c(current_time)

local c_time_date = "`c_date'"+"_"+"`c_time'"
local time_str = subinstr("`c_time_date'", ":", "_", .)
local time_str = subinstr("`time_str'", " ", "_", .)
di "`time_str'"


log using "Replication_RI_`time_str'_NC 2016.txt", text

*=============================================
* NC 2016 GOTV Program
* -> Analysis <-
* -> Randomization Inference <-
*=============================================

* Set Directory
cd "<filepath>"


* Expand available matrix size
set matsize 10000

* Set results buffer (lines retained)
set scrollbufsize 150000

* Stata version
version

use "Replication Data - NC 2016.dta", clear

* Covariates for analysis
local balancevars "electiondayage female reg_year g12 g08 cd_2 cd_3 cd_4 cd_5 cd_6 cd_7 cd_8 cd_9 cd_10 cd_11 cd_12 cd_13 best_gtop eipv1 eipv2"


* Catalist ethnicity code
gen mexican = ethnicity == "Mexican"
gen mex_eng = english * mexican
gen mex_bil = bilingual * mexican
	label var mexican "Catalist: Mexican American"
	label var mex_eng "English * Mexican American"
	label var mex_bil "Bilingual * Mexican American"

	
*----------------------------------------
* Set RI paramaters & postfile
*----------------------------------------

local overall_n "ri_eng_n ri_bil_n ri_diff_n" 
local overall_c "ri_eng_c ri_bil_c ri_diff_c" 
local group_n "ri_other_eng_n ri_other_bil_n ri_other_diff_n ri_mex_eng_n ri_mex_bil_n ri_mex_diff_n ri_mex_eng_h_n ri_mex_bil_h_n ri_mexother_diff_diff_n"
local group_c "ri_other_eng_c ri_other_bil_c ri_other_diff_c ri_mex_eng_c ri_mex_bil_c ri_mex_diff_c ri_mex_eng_h_c ri_mex_bil_h_c ri_mexother_diff_diff_c"


tempname ri_estimates 
postfile `ri_estimates' ri_iter `overall_n' `overall_c' `group_n' `group_c' using ri_estimates.dta, replace		

timer on 1

global ri_iterations = 1000

forval ri = 1/$ri_iterations {

*----------------------------------
* Randomization
*----------------------------------

* Balance by 
local balancevars "electiondayage female reg_year g12 g08 cd_2 cd_3 cd_4 cd_5 cd_6 cd_7 cd_8 cd_9 cd_10 cd_11 cd_12 cd_13 best_gtop eipv1 eipv2"

* Block by 
local blockvars "multi_hh vbm_hh"

* Change see by RI iteration 
local seed = 814083 + `ri'

* Run VBM HH separately to capture different proportions
	* VBM HH proportions are very slightly off (<0.1 pct pts) due challenge of groups when control was assigned in VBM experiment

gen assign_`ri' = .
	
* VBM HH = 1 (22 groups)	
* acceptance threshold = p>0.8
* Use aggregate option to combine groups
randomize if hh_tag==1 & vbm_hh==1, groups(22) balance(`balancevars') block(`blockvars') seed(`seed') minruns(5) maxruns(100) jointp(0.8) aggregate(4 9 9) replace

* Convert assignment variable in assigned groups
bys hhid: egen assign_hh_`ri' = max(_assignment) 
replace assign_`ri' = 0 if assign_hh_`ri' == 1
replace assign_`ri' = 1 if assign_hh_`ri' == 2
replace assign_`ri' = 2 if assign_hh_`ri' == 3

* NON VBM HH = 1 (20 groups)	
* acceptance threshold = p>0.8
* Use aggregate option to combine groups
randomize if hh_tag==1 & vbm_hh==0 , groups(20) balance(`balancevars') block(`blockvars') seed(`seed') minruns(5) maxruns(100) jointp(0.8) aggregate(2 9 9) replace

* Convert assignment variable in assigned groups
bys hhid: egen assign_hh2_`ri' = max(_assignment) 
replace assign_`ri' = 0 if assign_hh2_`ri' == 1
replace assign_`ri' = 1 if assign_hh2_`ri' == 2
replace assign_`ri' = 2 if assign_hh2_`ri' == 3


bys vbm_hh: tab assign_`ri', mi	

gen english = assign_`ri' == 1
gen bilingual = assign_`ri' == 2


*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* Weights by Probability of Assignment
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* Necessary because blocks do not have equal probability of assignment
* Weights are inverse probability of assignment

gen nc_wgt = .
replace nc_wgt = 1/(4/22) if vbm_hh==1 & assign_`ri'==0	
replace nc_wgt = 1/(9/22) if vbm_hh==1 & assign_`ri'==1	
replace nc_wgt = 1/(9/22) if vbm_hh==1 & assign_`ri'==2	

replace nc_wgt = 1/(2/20) if vbm_hh==0 & assign_`ri'==0	
replace nc_wgt = 1/(9/20) if vbm_hh==0 & assign_`ri'==1	
replace nc_wgt = 1/(9/20) if vbm_hh==0 & assign_`ri'==2	
	


*----------------------------------
* Treatment effects
*----------------------------------

* Set covariates & block variables for analysis
local balancevars "electiondayage female reg_year g12 g08 cd_2 cd_3 cd_4 cd_5 cd_6 cd_7 cd_8 cd_9 cd_10 cd_11 cd_12 cd_13 best_gtop eipv1 eipv2 multi_hh"

* Overall turnout
reg g16s english bilingual vbm_hh [pweight=nc_wgt], cluster(hhid)
	local ri_eng_n = _b[english]
	local ri_bil_n = _b[bilingual]
	local ri_diff_n = _b[english]-_b[bilingual]

reg g16s english bilingual vbm_hh `balancevars' [pweight=nc_wgt], cluster(hhid)
	local ri_eng_c = _b[english]
	local ri_bil_c = _b[bilingual]
	local ri_diff_c = _b[english]-_b[bilingual]


* Heterogeneity by nation of origin strata (without and with balance variables b/c blocked randomization)
reg g16s english mex_eng bilingual mex_bil mexican vbm_hh [pweight=nc_wgt], cluster(hhid)
	local ri_other_eng_n = _b[english]
	local ri_other_bil_n = _b[bilingual]
	local ri_other_diff_n = _b[english]-_b[bilingual]
	local ri_mex_eng_n = _b[english] + _b[mex_eng]
	local ri_mex_bil_n = _b[bilingual] + _b[mex_bil]
	local ri_mex_diff_n = (_b[english] + _b[mex_eng]) - (_b[bilingual] + _b[mex_bil])
	local ri_mex_eng_h_n = _b[mex_eng]
	local ri_mex_bil_h_n = _b[mex_bil]
	local ri_mexother_diff_diff_n = (_b[english]-_b[bilingual])-((_b[english] + _b[mex_eng]) - (_b[bilingual] + _b[mex_bil]))

reg g16s english mex_eng bilingual mex_bil mexican vbm_hh `balancevars' [pweight=nc_wgt], cluster(hhid)
	local ri_other_eng_c = _b[english]
	local ri_other_bil_c = _b[bilingual]
	local ri_other_diff_c = _b[english]-_b[bilingual]
	local ri_mex_eng_c = _b[english] + _b[mex_eng]
	local ri_mex_bil_c = _b[bilingual] + _b[mex_bil]
	local ri_mex_diff_c = (_b[english] + _b[mex_eng]) - (_b[bilingual] + _b[mex_bil])
	local ri_mex_eng_h_c = _b[mex_eng]
	local ri_mex_bil_h_c = _b[mex_bil]
	local ri_mexother_diff_diff_c = (_b[english]-_b[bilingual])-((_b[english] + _b[mex_eng]) - (_b[bilingual] + _b[mex_bil]))
	
	
post `ri_estimates' (`ri') (`ri_eng_n') (`ri_bil_n') (`ri_diff_n') ///
	(`ri_eng_c') (`ri_bil_c') (`ri_diff_c') ///
	(`ri_other_eng_n') (`ri_other_bil_n') (`ri_other_diff_n') ///
	(`ri_mex_eng_n') (`ri_mex_bil_n') (`ri_mex_diff_n') ///
	(`ri_mex_eng_h_n') (`ri_mex_bil_h_n') (`ri_mexother_diff_diff_n') ///
	(`ri_other_eng_c') (`ri_other_bil_c') (`ri_other_diff_c') ///
	(`ri_mex_eng_c') (`ri_mex_bil_c') (`ri_mex_diff_c') ///
	(`ri_mex_eng_h_c') (`ri_mex_bil_h_c') (`ri_mexother_diff_diff_c')


	
* clear for next iteration
drop assign_`ri' assign_hh_`ri' assign_hh2_`ri' english bilingual h33_eng h33_bil nc_wgt
	
}
postclose `ri_estimates' 

timer off 1
timer list 1
display "Elapsed time in hours = " r(t1)/(60*60)

*----------------------------------------
* Capture Observed ATEs
*----------------------------------------
use "Replication Data - NC 2016.dta", clear

* Catalist ethnicity code
gen mexican = ethnicity == "Mexican"
gen mex_eng = english * mexican
gen mex_bil = bilingual * mexican
	label var mexican "Catalist: Mexican American"
	label var mex_eng "English * Mexican American"
	label var mex_bil "Bilingual * Mexican American"


*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* Weights by Probability of Assignment
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* Necessary because blocks do not have equal probability of assignment
* Weights are inverse probability of assignment


gen nc_wgt = .
replace nc_wgt = 1/(4/22) if vbm_hh==1 & assign_tx==0	
replace nc_wgt = 1/(9/22) if vbm_hh==1 & assign_tx==25	
replace nc_wgt = 1/(9/22) if vbm_hh==1 & assign_tx==31	

replace nc_wgt = 1/(2/20) if vbm_hh==0 & assign_tx==0	
replace nc_wgt = 1/(9/20) if vbm_hh==0 & assign_tx==25	
replace nc_wgt = 1/(9/20) if vbm_hh==0 & assign_tx==31	
	
	
	
* Set covariates & block variables for analysis
local balancevars "electiondayage female reg_year g12 g08 cd_2 cd_3 cd_4 cd_5 cd_6 cd_7 cd_8 cd_9 cd_10 cd_11 cd_12 cd_13 best_gtop eipv1 eipv2 multi_hh"

* Overall turnout
reg g16s english bilingual vbm_hh  [pweight=nc_wgt], cluster(hhid)
	* Diff in ATE
	lincom english - bilingual

	local obs_eng_n = _b[english]
	local obs_bil_n = _b[bilingual]
	local obs_diff_n = _b[english]-_b[bilingual]
	*local obs_vbm_hh_n = (_b[vbm_hh])
	*local obs__cons_n = (_b[_cons])

reg g16s english bilingual vbm_hh `balancevars' [pweight=nc_wgt], cluster(hhid)
	* Diff in ATE
	lincom english - bilingual

	local obs_eng_c = _b[english]
	local obs_bil_c = _b[bilingual]
	local obs_diff_c = _b[english]-_b[bilingual]
	/*foreach var in vbm_hh `balancevars' _cons {
		local obs_`var'_c = _b[`var']
		}*/

	di "NO COVARIATES"
	di "English: `obs_eng_n'"
	di "Bilingual: `obs_bil_n'"
	di "Difference: `obs_diff_n'"
	di "-----------------------------------"
	di "WITH COVARIATES"
	di "English: `obs_eng_c'"
	di "Bilingual: `obs_bil_c'"
	di "Difference: `obs_diff_c'"
	/*foreach var in `balancevars' _cons {
		di "`var': `obs_`var'_c'"
		}*/
		

	

* Heterogeneity by nation of origin strata (without and with balance variables b/c blocked randomization)
reg g16s english mex_eng bilingual mex_bil mexican vbm_hh [pweight=nc_wgt], cluster(hhid)
	* CATE
	lincom english + mex_eng
	lincom bilingual + mex_bil
	* Diff in ATE
	lincom english - bilingual
	lincom (english + mex_eng) - (bilingual + mex_bil)
	lincom (english - bilingual) - ((english + mex_eng) - (bilingual + mex_bil))
	
	local obs_other_eng_n = _b[english]
	local obs_other_bil_n = _b[bilingual]
	local obs_other_diff_n = _b[english]-_b[bilingual]
	local obs_mex_eng_n = _b[english] + _b[mex_eng]
	local obs_mex_bil_n = _b[bilingual] + _b[mex_bil]
	local obs_mex_diff_n = (_b[english] + _b[mex_eng]) - (_b[bilingual] + _b[mex_bil])
	local obs_mex_eng_h_n = _b[mex_eng]
	local obs_mex_bil_h_n = _b[mex_bil]
	local obs_mexother_diff_diff_n = (_b[english]-_b[bilingual])-((_b[english] + _b[mex_eng]) - (_b[bilingual] + _b[mex_bil]))
	
	
reg g16s english mex_eng bilingual mex_bil mexican vbm_hh `balancevars' [pweight=nc_wgt], cluster(hhid) 
	* CATE
	lincom english + mex_eng
	lincom bilingual + mex_bil
	* Diff in ATE
	lincom english - bilingual
	lincom (english + mex_eng) - (bilingual + mex_bil)
	lincom (english - bilingual) - ((english + mex_eng) - (bilingual + mex_bil))
	
	local obs_other_eng_c = _b[english]
	local obs_other_bil_c = _b[bilingual]
	local obs_other_diff_c = _b[english]-_b[bilingual]
	local obs_mex_eng_c = _b[english] + _b[mex_eng]
	local obs_mex_bil_c = _b[bilingual] + _b[mex_bil]
	local obs_mex_diff_c = (_b[english] + _b[mex_eng]) - (_b[bilingual] + _b[mex_bil])
	local obs_mex_eng_h_c = _b[mex_eng]
	local obs_mex_bil_h_c = _b[mex_bil]
	local obs_mexother_diff_diff_c = (_b[english]-_b[bilingual])-((_b[english] + _b[mex_eng]) - (_b[bilingual] + _b[mex_bil]))
	

	
*-----------------------------------
* Calculate RI p-values 
*-----------------------------------

* Open file of RI estimates
use ri_estimates.dta, clear

* quantity is local that is list of all quantities of interest
local quantity "eng bil diff other_eng other_bil other_diff mex_eng mex_bil mex_diff mex_eng_h mex_bil_h mexother_diff_diff"

* RI p-values from model without and with covariates (two-sided hypothesis test)

foreach model in "n" "c" {
  foreach outcome in `quantity' {
	gen c_`outcome'_`model' = abs(ri_`outcome'_`model')> abs(`obs_`outcome'_`model'') 
	label var c_`outcome'_`model' "Cases w/ `outcome'_`model' > observed value (two-sided)"
	qui sum c_`outcome'_`model'
		local p_`outcome'_`model' = r(mean)
	display "estimate for `outcome'_`model' = `obs_`outcome'_`model''"
	display "`outcome'_`model' p-value =  `p_`outcome'_`model''"
	disp "-----------------------------------------------------------"
  }
}
	
log close
	
